github: https://github.com/avven1re/SDA_GroupAssignment
#devtools::install_github("amices/ggmice")
library(tidyverse)
library(mice)
library(ggmice)
library(psych)
library(visdat)
#Input Data
ess <- readRDS("Ess round 9.RDS")
Create a function to find the columns full of NAs
#Find the column full of NAs
findNACol <- function(data){
ind_vec <- c()
j <- 1
for (i in 1 : length(data[1, ])) {
if(sum(is.na(data[, i])) == length(data[, i])){
ind_vec[j] <- i
j <- j + 1
}
}
return(ind_vec)
}
Cutting the whole dataset by countries and get rid of NA columns
cutd <- function(data = ess){
cntrynames <- names(table(data$cntry))
num_cntry <- length(cntrynames)
cntrydata_list <- list()
for (k in 1 : num_cntry) {
cntry <- filter(data, cntry == cntrynames[k])
index <- findNACol(cntry)
processed <- cntry[, -index]
cntrydata_list[[k]] <- processed
}
names(cntrydata_list) <- cntrynames
return(cntrydata_list)
}
cntrydatalist <- cutd(ess)
We can see that each country has about 300~340 variables:
summary(cntrydatalist)[, 1:2]
## Length Class
## AT 323 data.frame
## BE 329 data.frame
## BG 323 data.frame
## CH 326 data.frame
## CY 321 data.frame
## CZ 314 data.frame
## DE 335 data.frame
## EE 324 data.frame
## ES 328 data.frame
## FI 323 data.frame
## FR 321 data.frame
## GB 339 data.frame
## HR 337 data.frame
## HU 344 data.frame
## IE 326 data.frame
## IT 321 data.frame
## LT 323 data.frame
## LV 330 data.frame
## ME 335 data.frame
## NL 338 data.frame
## NO 320 data.frame
## PL 335 data.frame
## PT 328 data.frame
## RS 332 data.frame
## SE 324 data.frame
## SI 327 data.frame
## SK 338 data.frame
And now we get a list of each countries dataset and also remove the NA columns.
Here we can draw a barplot of the percentage every countries’ missing values
n_mvec <- matrix(NaN, nrow = 1, ncol = length(cntrydatalist))
for (i in 1 : length(cntrydatalist)) {
n_mvec[i] <- sum(is.na(cntrydatalist[[i]])) / (dim(cntrydatalist[[i]])[1] * dim(cntrydatalist[[i]])[2]) * 100
}
colnames(n_mvec) <- names(cntrydatalist)
barplot(n_mvec, cex.names = 0.5, ylim = c(0, 0.8), main = "The missing value percentage in each country (%)")
Take NL for example, lets take a look of which variables include missing values:
which(colSums(is.na(cntrydatalist$NL))>0)
## eisced wkhct wkhtot eiscedp wkhtotp eiscedf eiscedm inwtm
## 188 216 218 232 251 253 258 331
m_NL_n <- names(which(colSums(is.na(cntrydatalist$NL))>0))
m_NL_n
## [1] "eisced" "wkhct" "wkhtot" "eiscedp" "wkhtotp" "eiscedf" "eiscedm"
## [8] "inwtm"
From the ESS9 codebook, these variables are:
## eisced
## Min. : 1.000
## 1st Qu.: 2.000
## Median : 3.000
## Mean : 4.442
## 3rd Qu.: 6.000
## Max. :55.000
## NA's :2
## wkhct
## Min. : 0.00
## 1st Qu.:24.00
## Median :36.00
## Mean :30.24
## 3rd Qu.:40.00
## Max. :85.00
## NA's :289
## wkhtot
## Min. : 0.00
## 1st Qu.: 24.00
## Median : 36.00
## Mean : 34.67
## 3rd Qu.: 44.00
## Max. :120.00
## NA's :87
## eiscedp
## Min. : 1.000
## 1st Qu.: 2.000
## Median : 3.000
## Mean : 4.643
## 3rd Qu.: 6.000
## Max. :55.000
## NA's :613
## wkhtotp
## Min. : 0.00
## 1st Qu.: 26.00
## Median : 36.00
## Mean : 35.15
## 3rd Qu.: 40.00
## Max. :100.00
## NA's :955
## eiscedf
## Min. : 1.000
## 1st Qu.: 2.000
## Median : 2.000
## Mean : 3.408
## 3rd Qu.: 5.000
## Max. :55.000
## NA's :234
## eiscedm
## Min. : 1.000
## 1st Qu.: 1.000
## Median : 2.000
## Mean : 3.052
## 3rd Qu.: 3.000
## Max. :55.000
## NA's :145
So we have: 4 ordinal variables include missing values and 3 numerical variables include missing values.
# "IE" "AT" "PT" "HU" "PL"
#summary(cntrydatalist$AT$hinctnta)
AT <- cntrydatalist$AT
IE <- cntrydatalist$IE
PT <- cntrydatalist$PT
HU <- cntrydatalist$HU
PL <- cntrydatalist$PL
barplot(table(IE$hinctnta), names.arg=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "Refusal", "Don't Know"),
horiz = F, las = 2, col="#69b3a2", main = "The Distribution of hinctnta in AT")
barplot(table(AT$hinctnta), names.arg=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "Refusal", "Don't Know"),
horiz = F, las = 2, col="#69b3a2", main = "The Distribution of hinctnta in IE")
barplot(table(PT$hinctnta), names.arg=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "Refusal", "Don't Know"),
horiz = F, las = 2, col="#69b3a2", main = "The Distribution of hinctnta in PT")
barplot(table(HU$hinctnta), names.arg=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "Refusal", "Don't Know"),
horiz = F, las = 2, col="#69b3a2", main = "The Distribution of hinctnta in HU")
barplot(table(PL$hinctnta), names.arg=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "Refusal", "Don't Know"),
horiz = F, las = 2, col="#69b3a2", main = "The Distribution of hinctnta in PL")
AT$hinctnta[AT$hinctnta == 88] <- NA
AT$hinctnta[AT$hinctnta == 77] <- NA
IE$hinctnta[IE$hinctnta == 88] <- NA
IE$hinctnta[IE$hinctnta == 77] <- NA
PT$hinctnta[PT$hinctnta == 88] <- NA
PT$hinctnta[PT$hinctnta == 77] <- NA
HU$hinctnta[HU$hinctnta == 88] <- NA
HU$hinctnta[HU$hinctnta == 77] <- NA
PL$hinctnta[PL$hinctnta == 88] <- NA
PL$hinctnta[PL$hinctnta == 77] <- NA
na_mat <- matrix(c(sum(is.na(AT$hinctnta)), sum(is.na(IE$hinctnta)),
sum(is.na(PT$hinctnta)), sum(is.na(HU$hinctnta)), sum(is.na(PL$hinctnta))), 1, 5)
colnames(na_mat) <- c("AT", "IE", "PT", "HU", "PL")
barplot(na_mat, main = "The number of missing values of hinctnta")
per_na_mat <- matrix(c(sum(is.na(AT$hinctnta))/dim(AT)[1], sum(is.na(IE$hinctnta))/dim(IE)[1],
sum(is.na(PT$hinctnta))/dim(PT)[1], sum(is.na(HU$hinctnta))/dim(HU)[1],
sum(is.na(PL$hinctnta))/dim(PL)[1]), 1, 5)
colnames(per_na_mat) <- c("AT", "IE", "PT", "HU", "PL")
barplot(per_na_mat, main = "The proportion of missing values of hinctnta in each country", ylim = c(0, 1.0))
Missing data patterns are not shown as they are uninformative.
To solven missingness in the income variable (hinctnta), we will use multiple imputation. However, since this variable is reported in deciles, imputation will not be straightforward. Ryder et al. (2011) recommend using the midpoint for each class as a surrogate to use for imputation. Furthermore, Donnelly and Pop-Eleches (2018) recommend to use the lower bound of the 10th category plus the width of category 9 as a surrogate for the highest decile. Since deciles differ across countries, this will be done seperately for each country.
# Create decile objects and join for imputation.
# Changing the levels of hinctnta to be the median (the middle of the category) of the deciles
# Austria
AT_deciles <- cbind(1:10, c(7650, 18200, 23400, 28350, 34050, 40150, 47300, 56000, 69050, 94400)) %>%
as.data.frame()
colnames(AT_deciles) <- c("hinctnta", "income")
AT_deciles$income <- as.numeric(AT_deciles$income) # make numeric
AT <- AT %>% left_join(AT_deciles, by = "hinctnta") # add income surrogate
# Ireland
IE_deciles <- cbind(1:10, c(135, 327.50, 447.5, 572.5, 710, 857.5, 1022.5, 1227.5, 1510, 2020)) %>%
as.data.frame()
colnames(IE_deciles) <- c("hinctnta", "income")
IE_deciles$income <- as.numeric(IE_deciles$income) # make numeric
IE <- IE %>% left_join(IE_deciles, by = "hinctnta") # add income surrogate
# Hungary
HU_deciles <- cbind(1:10, c(6500, 149500, 184500, 214500, 244500, 274500, 304500, 339500, 384500, 450000)) %>%
as.data.frame()
colnames(HU_deciles) <- c("hinctnta", "income")
HU_deciles$income <- as.numeric(HU_deciles$income) # make numeric
HU <- HU %>% left_join(HU_deciles, by = "hinctnta") # add income surrogate
# Portugal
PT_deciles <- cbind(1:10, c(2818, 6709, 8847.5, 11265, 13885, 16556.5, 19728, 23948, 30566.5, 44143)) %>%
as.data.frame()
colnames(PT_deciles) <- c("hinctnta", "income")
PT_deciles$income <- as.numeric(PT_deciles$income) # make numeric
PT <- PT %>% left_join(PT_deciles, by = "hinctnta") # add income surrogate
# Poland
PL_deciles <- cbind(1:10, c(850, 2000.5, 3650.5, 3300.5, 3950.5, 4650.5, 5450.5, 6450.5, 7900.5, 10600)) %>%
as.data.frame()
colnames(PL_deciles) <- c("hinctnta", "income")
PL_deciles$income <- as.numeric(PL_deciles$income) # make numeric
PL <- PL %>% left_join(PL_deciles, by = "hinctnta") # add income surrogate
# Clean important variables chosen for the imputation model
# Define missing values and recode variables for the model
# Austria
AT$eisced[AT$eisced == 55] <- NA
AT$eisced <- factor(AT$eisced, levels = c("1", "2", "3", "4", "5", "6", "7"), ordered = T)
AT$bthcld[AT$bthcld == 1] <- 0
AT$bthcld[AT$bthcld == 2] <- 1
AT$dscrgrp[AT$dscrgrp == 1] <- 0
AT$dscrgrp[AT$dscrgrp == 2] <- 1
AT$bthcld[AT$bthcld != 0 & AT$bthcld != 1] <- NA
AT$maritalb[!(AT$maritalb %in% c(1:6))] <- NA
AT$lrscale[!(AT$lrscale %in% c(0:10))] <- NA
AT$dscrgrp[AT$dscrgrp != 0 & AT$dscrgrp != 1] <- NA
AT$hhmmb[AT$hhmmb %in% c(77, 88)] <- NA
AT$agea[AT$agea == 999] <- NA
AT$bthcld <- as.factor(AT$bthcld)
AT$maritalb <- as.factor(AT$maritalb)
AT$dscrgrp <- as.factor(AT$dscrgrp)
# Hungary
HU$eisced[HU$eisced == 55] <- NA
HU$eisced <- factor(HU$eisced, levels = c("1", "2", "3", "4", "5", "6", "7"), ordered = T)
HU$bthcld[HU$bthcld == 1] <- 0
HU$bthcld[HU$bthcld == 2] <- 1
HU$dscrgrp[HU$dscrgrp == 1] <- 0
HU$dscrgrp[HU$dscrgrp == 2] <- 1
HU$bthcld[HU$bthcld != 0 & HU$bthcld != 1] <- NA
HU$maritalb[!(HU$maritalb %in% c(1:6))] <- NA
HU$lrscale[!(HU$lrscale %in% c(0:10))] <- NA
HU$dscrgrp[HU$dscrgrp != 0 & HU$dscrgrp != 1] <- NA
HU$hhmmb[HU$hhmmb %in% c(77, 88)] <- NA
HU$agea[HU$agea == 999] <- NA
HU$bthcld <- as.factor(HU$bthcld)
HU$maritalb <- as.factor(HU$maritalb)
HU$dscrgrp <- as.factor(HU$dscrgrp)
# Ireland
IE$eisced[IE$eisced == 55] <- NA
IE$eisced <- factor(IE$eisced, levels = c("1", "2", "3", "4", "5", "6", "7"), ordered = T)
IE$bthcld[IE$bthcld == 1] <- 0
IE$bthcld[IE$bthcld == 2] <- 1
IE$dscrgrp[IE$dscrgrp == 1] <- 0
IE$dscrgrp[IE$dscrgrp == 2] <- 1
IE$bthcld[IE$bthcld != 0 & IE$bthcld != 1] <- NA
IE$maritalb[!(IE$maritalb %in% c(1:6))] <- NA
IE$lrscale[!(IE$lrscale %in% c(0:10))] <- NA
IE$dscrgrp[IE$dscrgrp != 0 & IE$dscrgrp != 1] <- NA
IE$hhmmb[IE$hhmmb %in% c(77, 88)] <- NA
IE$agea[IE$agea == 999] <- NA
IE$bthcld <- as.factor(IE$bthcld)
IE$maritalb <- as.factor(IE$maritalb)
IE$dscrgrp <- as.factor(IE$dscrgrp)
# Portugal
PT$eisced[PT$eisced == 55] <- NA
PT$eisced <- factor(PT$eisced, levels = c("1", "2", "3", "4", "5", "6", "7"), ordered = T)
PT$bthcld[PT$bthcld == 1] <- 0
PT$bthcld[PT$bthcld == 2] <- 1
PT$dscrgrp[PT$dscrgrp == 1] <- 0
PT$dscrgrp[PT$dscrgrp == 2] <- 1
PT$bthcld[PT$bthcld != 0 & PT$bthcld != 1] <- NA
PT$maritalb[!(PT$maritalb %in% c(1:6))] <- NA
PT$lrscale[!(PT$lrscale %in% c(0:10))] <- NA
PT$dscrgrp[PT$dscrgrp != 0 & PT$dscrgrp != 1] <- NA
PT$hhmmb[PT$hhmmb %in% c(77, 88)] <- NA
PT$agea[PT$agea == 999] <- NA
PT$bthcld <- as.factor(PT$bthcld)
PT$maritalb <- as.factor(PT$maritalb)
PT$dscrgrp <- as.factor(PT$dscrgrp)
# Poland
PL$eisced[PL$eisced == 55] <- NA
PL$eisced <- factor(PL$eisced, levels = c("1", "2", "3", "4", "5", "6", "7"), ordered = T)
PL$bthcld[PL$bthcld == 1] <- 0
PL$bthcld[PL$bthcld == 2] <- 1
PL$dscrgrp[PL$dscrgrp == 1] <- 0
PL$dscrgrp[PL$dscrgrp == 2] <- 1
PL$bthcld[PL$bthcld != 0 & PL$bthcld != 1] <- NA
PL$maritalb[!(PL$maritalb %in% c(1:6))] <- NA
PL$lrscale[!(PL$lrscale %in% c(0:10))] <- NA
PL$dscrgrp[PL$dscrgrp != 0 & PL$dscrgrp != 1] <- NA
PL$hhmmb[PL$hhmmb %in% c(77, 88)] <- NA
PL$agea[PL$agea == 999] <- NA
PL$bthcld <- as.factor(PL$bthcld)
PL$maritalb <- as.factor(PL$maritalb)
PL$dscrgrp <- as.factor(PL$dscrgrp)
# Create variable vector containing the names of relevant variables
variables <- c("pdwrk", "bthcld", "gndr", "maritalb","lrscale", "dscrgrp", "hhmmb", "agea", "wkhtot", "anweight", "income", "eisced")
# Select subsets of data with relevant variables
AT_sub <- AT %>% select(variables)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(variables)
##
## # Now:
## data %>% select(all_of(variables))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
HU_sub <- HU %>% select(variables)
IE_sub <- IE %>% select(variables)
PT_sub <- PT %>% select(variables)
PL_sub <- PL %>% select(variables)
# Interactions with anweight for Portugal
PT_sub$anweight_pdwrk <- PT_sub$anweight * PT_sub$pdwrk
PT_sub$anweight_bthcld <- PT_sub$anweight * as.numeric(PT_sub$bthcld)
PT_sub$anweight_lrscale <- PT_sub$anweight * PT_sub$lrscale
PT_sub$anweight_dscrgrp <- PT_sub$anweight * as.numeric(PT_sub$dscrgrp)
PT_sub$anweight_hhmmb <-PT_sub$anweight * PT_sub$hhmmb
PT_sub$anweight_agea <- PT_sub$anweight * PT_sub$agea
PT_sub$anweight_wkhtot <- PT_sub$anweight * PT_sub$wkhtot
PT_sub$anweight_income <- PT_sub$anweight *PT_sub$income
PT_sub$anweight_eisced <- PT_sub$anweight * as.numeric(PT_sub$eisced)
# Interactions with anweight for Austria
AT_sub$anweight_pdwrk <- AT_sub$anweight * AT_sub$pdwrk
AT_sub$anweight_bthcld <- AT_sub$anweight * as.numeric(AT_sub$bthcld)
AT_sub$anweight_lrscale <- AT_sub$anweight * AT_sub$lrscale
AT_sub$anweight_dscrgrp <- AT_sub$anweight * as.numeric(AT_sub$dscrgrp)
AT_sub$anweight_hhmmb <- AT_sub$anweight * AT_sub$hhmmb
AT_sub$anweight_agea <- AT_sub$anweight * AT_sub$agea
AT_sub$anweight_wkhtot <- AT_sub$anweight * AT_sub$wkhtot
AT_sub$anweight_income <- AT_sub$anweight * AT_sub$income
AT_sub$anweight_eisced <- AT_sub$anweight * as.numeric(AT_sub$eisced)
# Interactions with anweight for Hungary
HU_sub$anweight_pdwrk <- HU_sub$anweight * HU_sub$pdwrk
HU_sub$anweight_bthcld <- HU_sub$anweight * as.numeric(HU_sub$bthcld)
HU_sub$anweight_lrscale <- HU_sub$anweight * HU_sub$lrscale
HU_sub$anweight_dscrgrp <-HU_sub$anweight * as.numeric(HU_sub$dscrgrp)
HU_sub$anweight_hhmmb <- HU_sub$anweight * HU_sub$hhmmb
HU_sub$anweight_agea <- HU_sub$anweight * HU_sub$agea
HU_sub$anweight_wkhtot <- HU_sub$anweight * HU_sub$wkhtot
HU_sub$anweight_income <- HU_sub$anweight * HU_sub$income
HU_sub$anweight_eisced <- HU_sub$anweight * as.numeric(HU_sub$eisced)
# Interactions with anweight for Ireland
IE_sub$anweight_pdwrk <- IE_sub$anweight * IE_sub$pdwrk
IE_sub$anweight_bthcld <- IE_sub$anweight * as.numeric(IE_sub$bthcld)
IE_sub$anweight_lrscale <- IE_sub$anweight * IE_sub$lrscale
IE_sub$anweight_dscrgrp <- IE_sub$anweight * as.numeric(IE_sub$dscrgrp)
IE_sub$anweight_hhmmb <- IE_sub$anweight * IE_sub$hhmmb
IE_sub$anweight_agea <- IE_sub$anweight * IE_sub$agea
IE_sub$anweight_wkhtot <- IE_sub$anweight * IE_sub$wkhtot
IE_sub$anweight_income <- IE_sub$anweight * IE_sub$income
IE_sub$anweight_eisced <- IE_sub$anweight * as.numeric(IE_sub$eisced)
# Interactions with anweight for Poland
PL_sub$anweight_pdwrk <- PL_sub$anweight * PL_sub$pdwrk
PL_sub$anweight_bthcld <- PL_sub$anweight * as.numeric(PL_sub$bthcld)
PL_sub$anweight_lrscale <- PL_sub$anweight * PL_sub$lrscale
PL_sub$anweight_dscrgrp <- PL_sub$anweight * as.numeric(PL_sub$dscrgrp)
PL_sub$anweight_hhmmb <- PL_sub$anweight * PL_sub$hhmmb
PL_sub$anweight_agea <- PL_sub$anweight * PL_sub$agea
PL_sub$anweight_wkhtot <- PL_sub$anweight * PL_sub$wkhtot
PL_sub$anweight_income <- PL_sub$anweight * PL_sub$income
PL_sub$anweight_eisced <- PL_sub$anweight * as.numeric(PL_sub$eisced)
# Create methods for imputation models (this is the same for every country)
meth <- make.method(AT_sub)
# Predictor matrix for Portugal
PT_pred <- quickpred(PT_sub)
PT_pred[, 'income'] <- 1
PT_pred['income', 'income'] <- 0
# Predictor matrix for Austria
AT_pred <- quickpred(AT_sub)
AT_pred[, 'income'] <- 1
AT_pred['income', 'income'] <- 0
# Predictor matrix for Ireland
IE_pred <- quickpred(IE_sub)
IE_pred[, 'income'] <- 1
IE_pred['income', 'income'] <- 0
# Predictor matrix Hungary
HU_pred <- quickpred(HU_sub)
HU_pred[, 'income'] <- 1
HU_pred['income', 'income'] <- 0
# Predictor matrix for Poland
PL_pred <- quickpred(PL_sub)
PL_pred[, 'income'] <- 1
PL_pred['income', 'income'] <- 0
# Imputation for Portugal
vis_miss(PT_sub) # Imputing m sets according to amount of percentage missing in income
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the visdat package.
## Please report the issue at <]8;;https://github.com/ropensci/visdat/issueshttps://github.com/ropensci/visdat/issues]8;;>.
PT_imp <- mice(PT_sub,
m = 20,
maxit = 10,
method = meth,
predictorMatrix = PT_pred,
seed = 12345,
print = FALSE)
## Warning: Number of logged events: 2600
# Imputation for Poland
vis_miss(PL_sub) # Imputing m sets according to amount of percentage missing in income
PL_imp <- mice(PL_sub,
m = 39,
maxit = 10,
method = meth,
predictorMatrix = PL_pred,
seed = 12345,
print = FALSE)
# Imputation for Hungary
vis_miss(HU_sub) # Imputing m sets according to amount of percentage missing in income
HU_imp <- mice(HU_sub,
m = 40,
maxit = 10,
method = meth,
predictorMatrix = HU_pred,
seed = 12345,
print = FALSE)
# Imputation for Ireland
vis_miss(IE_sub) # Imputing m sets according to amount of percentage missing in income
IE_imp <- mice(IE_sub,
m = 28,
maxit = 10,
method = meth,
predictorMatrix = IE_pred,
seed = 12345,
print = FALSE)
# Imputation for Austria
vis_miss(AT_sub) # Imputing m sets according to amount of percentage missing in income
AT_imp <- mice(AT_sub,
m = 18,
maxit = 10,
method = meth,
predictorMatrix = AT_pred,
seed = 12345,
print = FALSE)
# Checking convergence for Portugal
plot(PT_imp)
densityplot(PT_imp)[4]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run:
##
## ggmice(my_mids, ggplot2::aes(x = my_vrb, group = .imp)) +
## ggplot2::geom_density()
##
## See amices.org/ggmice for more info.
bwplot(PT_imp)[8]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run:
##
## ggmice(my_mids, ggplot2::aes(x = .imp, y = my_vrb)) +
## ggplot2::geom_boxplot()
##
## See amices.org/ggmice for more info.
# Outcome is midpoint of the median income class
PT_imp %>%
complete('long') %>%
with(tapply(income, .imp, median)) %>%
median()
## [1] 13885
# Checking convergence for Poland
plot(PL_imp)
densityplot(PL_imp)[4]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run:
##
## ggmice(my_mids, ggplot2::aes(x = my_vrb, group = .imp)) +
## ggplot2::geom_density()
##
## See amices.org/ggmice for more info.
bwplot(PL_imp)[8]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run:
##
## ggmice(my_mids, ggplot2::aes(x = .imp, y = my_vrb)) +
## ggplot2::geom_boxplot()
##
## See amices.org/ggmice for more info.
# Outcome is midpoint of the median income class
PL_imp %>%
complete('long') %>%
with(tapply(income, .imp, median)) %>%
median()
## [1] 3950.5
# Checking convergence for Ireland
plot(IE_imp)
densityplot(IE_imp)[4]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run:
##
## ggmice(my_mids, ggplot2::aes(x = my_vrb, group = .imp)) +
## ggplot2::geom_density()
##
## See amices.org/ggmice for more info.
bwplot(IE_imp)[8]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run:
##
## ggmice(my_mids, ggplot2::aes(x = .imp, y = my_vrb)) +
## ggplot2::geom_boxplot()
##
## See amices.org/ggmice for more info.
# Outcome is midpoint of the median income class
IE_imp %>%
complete('long') %>%
with(tapply(income, .imp, median)) %>%
median()
## [1] 572.5
# Checking convergence for Hungary
plot(HU_imp)
densityplot(HU_imp)[4]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run:
##
## ggmice(my_mids, ggplot2::aes(x = my_vrb, group = .imp)) +
## ggplot2::geom_density()
##
## See amices.org/ggmice for more info.
bwplot(HU_imp)[8]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run:
##
## ggmice(my_mids, ggplot2::aes(x = .imp, y = my_vrb)) +
## ggplot2::geom_boxplot()
##
## See amices.org/ggmice for more info.
# Outcome is midpoint of the median income class
HU_imp %>%
complete('long') %>%
with(tapply(income, .imp, median)) %>%
median()
## [1] 214500
# Checking convergence for Austria
plot(AT_imp)
densityplot(AT_imp)[4]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run:
##
## ggmice(my_mids, ggplot2::aes(x = my_vrb, group = .imp)) +
## ggplot2::geom_density()
##
## See amices.org/ggmice for more info.
bwplot(AT_imp)[8]
## Hint: Did you know, an equivalent figure can be created with `ggmice()`?
## For example, to plot a variable named 'my_vrb' from a mids object called 'my_mids', run:
##
## ggmice(my_mids, ggplot2::aes(x = .imp, y = my_vrb)) +
## ggplot2::geom_boxplot()
##
## See amices.org/ggmice for more info.
# Outcome is midpoint of the median income class
AT_imp %>%
complete('long') %>%
with(tapply(income, .imp, median)) %>%
median()
## [1] 34050
Donnelly, M. J., & Pop-Eleches, G. (2018). Income measures in cross-national surveys: problems and solutions. Political Science Research and Methods, 6(2), 355-363.